######################################################################
### Fig 8: Topic of Trade Bills Likely to be Lobbied
######################################################################
rm(list=ls())

## library
library(foreign)
library(dplyr)
library(stm)
library(mvtnorm)

## setting directories
DATA_DIR <- setwd(getwd())

### loading data
Bills <- read.csv(file.path(DATA_DIR, "trade_bills_summary.txt"), sep="\t", quote="")
## reports with trade bills lobbied
r1 <- read.csv(file.path(DATA_DIR, "trade_bills_reports.txt"), sep="\t", quote="")

a <- paste(Bills$session, Bills$billnumber, sep="_")
idx <- which(a %in% r1$bill)
Bills$lobbied <- rep("no",nrow(Bills))
Bills$lobbied[idx] <- "yes"
Bills$lobbied <- as.factor(Bills$lobbied)

## remove 6 bills sponsored by independent for a cleaner analysis
rm <- which(!Bills$part %in% c("Republican", "Democrat"))
bills <- Bills[-rm,]

## STM analysis on lobbied vs non-lobbied bills
processed <- textProcessor(bills$summary, metadata = bills)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <-out$meta

bills$dum <- ifelse(bills$lobbied=="yes", 1,0)
summary(lm(dum~party, data=bills))

set.seed(02139)
k <- 4
Fit <- stm(out$documents, out$vocab, K = k,
                       prevalence = ~ lobbied + party,
                       data = out$meta, init.type = "Spectral")


texts <- processed$meta
orig <- as.character(texts$title)
short <- sapply(orig, function(x) as.character(substr(x, 1,200)))

labelTopics(Fit, 1:k)

thoughts1 <- findThoughts(Fit, texts = short,
                          n = 20, topics = 1)$docs[[1]]
thoughts2 <- findThoughts(Fit, texts = short,
                          n = 20, topics = 2)$docs[[1]]


out$meta$party <- as.factor(out$meta$party)
out$meta$lobbied <- as.factor(out$meta$lobbied)

prep <- estimateEffect(1:k ~ lobbied, Fit,
                       meta = out$meta, uncertainty = "Global")

topic1w <- labelTopics(Fit, 1:k)$prob[1,]
topic2w <- labelTopics(Fit, 1:k)$prob[2,]
topic3w <- labelTopics(Fit, 1:k)$prob[3,]
topic4w <- labelTopics(Fit, 1:k)$prob[4,]
topic1w <- paste("(", paste(topic1w, collapse=", "), ")", sep="")
topic2w <- paste("(", paste(topic2w, collapse=", "), ")", sep="")
topic3w <- paste("(", paste(topic3w, collapse=", "), ")", sep="")
topic4w <- paste("(", paste(topic4w, collapse=", "), ")", sep="")


pdf(file="./Figure8.pdf",
    width=15, height=7)

par(mfrow = c(1, 3), mar = c(4, .5, 5, .5), xpd=NA, cex.main=2)
plot.estimateEffect(prep, covariate = "lobbied", topics = 1:k,
                    model = Fit,
                    main ="",
                    xlim=c(-.5,.5), nsim=1000,
                    method = "difference",
                    cov.value1 = "yes", cov.value2 = "no",
                    labeltype="custom",
                    ylim=c(0.5,4),
                    custom.labels=rep("", k))

text(-0.35, 4, "Topic 1", col="red", cex=2.5)
text(0.25, 3, "Topic 2", col="darkgreen", cex=2.5)
text(0.20, 2, "Topic 3", col="black", cex=2.5)
text(0.18, 1, "Topic 4", col="black", cex=2.5)


text(0, 3.8, topic1w, cex=1.9, col="grey20")
text(0, 2.8, topic2w, cex=1.9, col="grey20")
text(0, 1.8, topic3w, cex=1.9, col="grey20")
text(0, 0.8, topic4w, cex=1.9, col="grey20")

text(0.4, 4.3, "Lobbied More", cex=2)
text(-0.4, 4.3, "Lobbied Less", cex=2)
arrows(-0.2, 4.3, 0.2, 4.3, code=3 ,xpd = TRUE, lwd=2)

plotQuote(thoughts1[c(1,2,7)], width = 40, text.cex=2,
          main = "Topic 1 (Less Likely to be Lobbied)")
plotQuote(thoughts2[c(1,2,17)], width = 40, text.cex=2,
          main = "Topic 2 (More Likely to be Lobbied)")

dev.off()
